registerDoFuture()
n_cores <- parallel::detectCores() - 1
plan(
strategy = cluster,
workers = parallel::makeCluster(n_cores)
)The purpose of this document is to illustrate time series analysis and forecasting. We will use a simulated dataset to analyze things like visits, discharges and payments. To perform these analyses we will be following the modeltime workflow. This report will be broken down into sections that follow that same workflow.
Lets take a look at our data and see what it has.
df_tbl %>%
glimpse()## Rows: 25,279
## Columns: 12
## $ mrn <chr> "66727914", "84487881", "68427598", "39652414"~
## $ visit_id <chr> "1283065398", "1171004549", "1951203647", "149~
## $ visit_start_date_time <dttm> 2011-12-26 01:14:00, 2011-12-31 05:00:00, 201~
## $ visit_end_date_time <dttm> 2012-01-01 12:06:00, 2012-01-01 12:51:00, 201~
## $ total_charge_amount <dbl> 62580.61, 38466.48, 31758.50, 14699.61, 66096.~
## $ total_adjustment_amount <dbl> -39117.58, -26930.67, -23706.23, -10841.80, -7~
## $ total_payment_amount <dbl> -23463.03, -11535.81, -8052.27, -3857.81, -587~
## $ payer_grouping <chr> "Commercial", "Blue Cross", "Blue Cross", "Blu~
## $ service_line <chr> "Medical", "Surgical", "Medical", "Chest Pain"~
## $ ip_op_flag <chr> "I", "I", "I", "I", "I", "O", "I", "I", "O", "~
## $ adm_date <date> 2011-12-26, 2011-12-31, 2011-12-28, 2011-12-3~
## $ dsch_date <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-0~
skim(df_tbl)| Name | df_tbl |
| Number of rows | 25279 |
| Number of columns | 12 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| Date | 2 |
| numeric | 3 |
| POSIXct | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| mrn | 0 | 1 | 8 | 8 | 0 | 16789 | 0 |
| visit_id | 0 | 1 | 10 | 10 | 0 | 25279 | 0 |
| payer_grouping | 0 | 1 | 10 | 10 | 0 | 2 | 0 |
| service_line | 0 | 1 | 2 | 44 | 0 | 27 | 0 |
| ip_op_flag | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| adm_date | 0 | 1 | 2011-12-19 | 2019-12-31 | 2015-05-27 | 2916 |
| dsch_date | 0 | 1 | 2012-01-01 | 2019-12-31 | 2015-05-29 | 2887 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| total_charge_amount | 0 | 1.00 | 34260.35 | 48285.83 | 0.5 | 10847.83 | 19475.11 | 39463.87 | 1109001.99 | ▇▁▁▁▁ |
| total_adjustment_amount | 0 | 1.00 | -22550.46 | 36053.24 | -914728.0 | -25220.65 | -11619.57 | -6951.65 | 63627.62 | ▁▁▁▁▇ |
| total_payment_amount | 586 | 0.98 | -11584.37 | 18165.44 | -495119.8 | -13132.33 | -5999.98 | -2920.08 | 436.01 | ▁▁▁▁▇ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| visit_start_date_time | 0 | 1 | 2011-12-19 21:33:00 | 2019-12-31 05:00:00 | 2015-05-27 01:55:00 | 16731 |
| visit_end_date_time | 0 | 1 | 2012-01-01 12:06:00 | 2019-12-31 22:58:00 | 2015-05-29 00:00:00 | 15184 |
Our objectives are to:
Our forecasting will focus on a grouped forecast where we are going to forecast the number of discharges by inpatient/outpatient visit type and by payer grouping.
We are going to do this on a weekly scale.
df_tblsummarise_by_time() with .by = "week", and n() the visits.transactions_weekly_tbltransactions_weekly_tbl <- df_tbl %>%
filter(payer_grouping != "?") %>%
mutate(id = str_c(ip_op_flag, payer_grouping, sep = "_")) %>%
mutate(id = as_factor(id)) %>%
group_by(id) %>%
summarise_by_time(
.date_var = dsch_date
, .by = "week"
, value = n()
) %>%
ungroup()
transactions_weekly_tbl## # A tibble: 1,667 x 3
## id dsch_date value
## <fct> <date> <int>
## 1 I_Commercial 2012-01-01 13
## 2 I_Commercial 2012-01-08 14
## 3 I_Commercial 2012-01-15 14
## 4 I_Commercial 2012-01-22 9
## 5 I_Commercial 2012-01-29 8
## 6 I_Commercial 2012-02-05 9
## 7 I_Commercial 2012-02-12 9
## 8 I_Commercial 2012-02-19 10
## 9 I_Commercial 2012-02-26 24
## 10 I_Commercial 2012-03-04 6
## # ... with 1,657 more rows
Use plot_time_series() to visualize the discharges.
log() transformation to see the effect on the time seriestransactions_weekly_tbl %>%
plot_time_series(
.date_var = dsch_date
, .color_var = id
, .facet_vars = id
, .facet_ncol = 2
, .value = log(value)
, .smooth = FALSE
, .interactive = FALSE
)Visualize the ACF using plot_acf_diagnostics() using a log() transformation. Look for:
transactions_weekly_tbl %>%
group_by(id) %>%
plot_acf_diagnostics(dsch_date, log(value))transactions_weekly_tbllog()standardize_vec()transactions_trans_weekly_tbltransactions_trans_weekly_tbl <- transactions_weekly_tbl %>%
group_by(id) %>%
mutate(value = log(value)) %>%
mutate(value = standardize_vec(value)) %>%
ungroup()## Standardization Parameters
## mean: 1.83577890003612
## standard deviation: 0.545791389303644
## Standardization Parameters
## mean: 3.08875144281386
## standard deviation: 0.367674566335952
## Standardization Parameters
## mean: 3.15330156564258
## standard deviation: 0.302421031976675
## Standardization Parameters
## mean: 1.59951348649452
## standard deviation: 0.514947645076106
mean_b <- 3.08875144281386
sd_b <- 0.367674566335952
mean_a <- 1.83577890003612
sd_a <- 0.545791389303644
mean_c <- 3.15330156564258
sd_c <- 0.302421031976675
mean_d <- 1.59951348649452
sd_d <- 0.514947645076106Visualize the log-standardized transactions using plot_time_series(). This confirms the transformation was performed successfully.
transactions_trans_weekly_tbl %>%
plot_time_series(
.date_var = dsch_date
, .color_var = id
, .facet_vars = id
, .facet_ncol = 2
, .value = value
, .smooth = FALSE
)We’ll use these parameters to create our “full dataset”. We’ve selected an 14-week forecast horizon. Our lag period is 14 weeks and we’ll try out a few rolling averages at various aggregations.
horizon <- 14
lag_period <- 14
rolling_periods <- c(7, 14, 28, 52)transactions_weekly_tblbind_rows() and future_frame() to extend the data frame .length_out = horizon.tk_augment_lags() to add a .lags = lag_periodtk_agument_slidify() to add .period = rolling_periods. Use mean as the rolling function. Make sure to “center” with “partial” windows.full_tbl.full_tbl <- transactions_trans_weekly_tbl %>%
# Add future window
group_by(id) %>%
bind_rows(
future_frame(
.data = .
, .date_var = dsch_date
, .length_out = horizon
)
) %>%
# Add autocorrelated lags
tk_augment_lags(value, .lags = lag_period) %>%
# Add rolling features
tk_augment_slidify(
.value = value_lag14,
.f = mean,
.period = rolling_periods,
.align = "center",
.partial = TRUE
) %>%
# Rename columns
rename_with(
.cols = contains("lag")
, .fn = ~ str_c("lag_", .)
) %>%
ungroup() %>%
select(dsch_date, everything())
full_tbl %>%
glimpse()## Rows: 1,723
## Columns: 8
## $ dsch_date <date> 2012-01-01, 2012-01-08, 2012-01-15, 2012-01-2~
## $ id <fct> I_Commercial, I_Commercial, I_Commercial, I_Co~
## $ value <dbl> 1.33598747, 1.47176823, 1.47176823, 0.66224144~
## $ lag_value_lag14 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_14 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_28 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_52 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
Visualize the features, and review what you see.
full_tblpivot_longer every column except “dsch_date”plot_time_series() to visualize the time series coloring by “name”.Review the visualization selecting one feature at a time and answering the following questions:
- Do the rolling lags present any issues?
- Which rolling lag captures the trend the best?
- Do you expect either of the Product Events features to help?
full_tbl %>%
pivot_longer(cols = -c(dsch_date, id)) %>%
group_by(id) %>%
plot_time_series(
dsch_date
, value
, name
, .smooth = FALSE
, .facet_ncol = 2
)Create a data_prepared_tbl by filtering full_tbl where “value” is non-missing.
data_prepared_tbl <- full_tbl %>%
filter(!is.na(value))
data_prepared_tbl## # A tibble: 1,667 x 8
## dsch_date id value lag_value_lag14 lag_value_lag14~ lag_value_lag14~
## <date> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 2012-01-01 I_Commercial 1.34 NA NA NA
## 2 2012-01-08 I_Commercial 1.47 NA NA NA
## 3 2012-01-15 I_Commercial 1.47 NA NA NA
## 4 2012-01-22 I_Commercial 0.662 NA NA NA
## 5 2012-01-29 I_Commercial 0.446 NA NA NA
## 6 2012-02-05 I_Commercial 0.662 NA NA NA
## 7 2012-02-12 I_Commercial 0.662 NA NA NA
## 8 2012-02-19 I_Commercial 0.855 NA NA NA
## 9 2012-02-26 I_Commercial 2.46 NA NA NA
## 10 2012-03-04 I_Commercial -0.0807 NA NA NA
## # ... with 1,657 more rows, and 2 more variables:
## # lag_value_lag14_roll_28 <dbl>, lag_value_lag14_roll_52 <dbl>
Create a forecast_tbl by filtering full_tbl where “value” is missing.
forecast_tbl <- full_tbl %>%
filter(is.na(value))
forecast_tbl## # A tibble: 56 x 8
## dsch_date id value lag_value_lag14 lag_value_lag14~ lag_value_lag14~
## <date> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-05 I_Commercial NA -2.09 -0.645 -0.101
## 2 2020-01-12 I_Commercial NA -0.824 -0.597 -0.131
## 3 2020-01-19 I_Commercial NA -1.35 -0.663 -0.146
## 4 2020-01-26 I_Commercial NA -0.0807 -0.434 -0.310
## 5 2020-02-02 I_Commercial NA -0.0807 0.0559 -0.266
## 6 2020-02-09 I_Commercial NA 0.202 0.0559 -0.372
## 7 2020-02-16 I_Commercial NA 1.19 0.313 -0.492
## 8 2020-02-23 I_Commercial NA 1.34 0.0251 -0.369
## 9 2020-03-01 I_Commercial NA -0.824 0.0655 -0.331
## 10 2020-03-08 I_Commercial NA 0.446 -0.0810 -0.238
## # ... with 46 more rows, and 2 more variables: lag_value_lag14_roll_28 <dbl>,
## # lag_value_lag14_roll_52 <dbl>
data_prepared_tbltime_series_split() to create a single time series split.
assess = horizon to get the last 14-weeks of data as testing data.cumulative = TRUE to use all of the previous data as training data.splitssplits <- data_prepared_tbl %>%
time_series_split(assess = horizon, cumulative = TRUE)Make a preprocessing recipe using recipe(). Note - It may help to prep() and juice() your recipe to see the effect of your transformations.
recipe() using “value ~ .” and data = training(splits)step_timeseries_signature() using the date featurestep_normalize().step_dummy(). Set one_hot = TRUE.recipe_spec_base <- recipe(value ~ ., data = training(splits) %>%
arrange(id, dsch_date)) %>%
step_mutate(ID = droplevels(id)) %>%
# Time Series Signature
step_timeseries_signature(dsch_date) %>%
step_rm(matches("(iso)|(xts)|(hour)|(minute)|(second)|(am.pm)")) %>%
# Standardization
step_normalize(matches("(index.num)|(year)|(yday)")) %>%
step_normalize(all_numeric_predictors()) %>%
# Near Zero Variance
step_nzv(all_numeric_predictors()) %>%
# Dummy Encoding (One Hot Encoding)
step_dummy(all_nominal(), one_hot = TRUE)
# Fourier - 7 Week ACF
#step_fourier(dsch_date, period = c(7, 14, 52), K = 2)
recipe_spec_base %>%
prep() %>%
juice() %>%
glimpse()## Rows: 1,612
## Columns: 49
## $ dsch_date <date> 2012-01-01, 2012-01-08, 2012-01-15, 2012-01-2~
## $ lag_value_lag14 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_14 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_28 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ lag_value_lag14_roll_52 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ value <dbl> 1.33598747, 1.47176823, 1.47176823, 0.66224144~
## $ dsch_date_index.num <dbl> -1.725001, -1.716430, -1.707859, -1.699288, -1~
## $ dsch_date_year <dbl> -1.505993, -1.505993, -1.505993, -1.505993, -1~
## $ dsch_date_half <dbl> -0.9739739, -0.9739739, -0.9739739, -0.9739739~
## $ dsch_date_quarter <dbl> -1.3235094, -1.3235094, -1.3235094, -1.3235094~
## $ dsch_date_month <dbl> -1.5780833, -1.5780833, -1.5780833, -1.5780833~
## $ dsch_date_day <dbl> -1.66112036, -0.86593615, -0.07075194, 0.72443~
## $ dsch_date_mday <dbl> -1.66112036, -0.86593615, -0.07075194, 0.72443~
## $ dsch_date_qday <dbl> -1.7148451, -1.4496234, -1.1844017, -0.9191799~
## $ dsch_date_yday <dbl> -1.70440843, -1.63725147, -1.57009451, -1.5029~
## $ dsch_date_mweek <dbl> 1.8076364, -1.3374363, -0.5511681, 0.2351000, ~
## $ dsch_date_week <dbl> -1.673800698, -1.606700483, -1.539600269, -1.4~
## $ dsch_date_week2 <dbl> 0.9935073, -1.0059107, 0.9935073, -1.0059107, ~
## $ dsch_date_week3 <dbl> -0.008424679, 1.226173687, -1.243023044, -0.00~
## $ dsch_date_week4 <dbl> -0.4447431, 0.4525359, 1.3498149, -1.3420222, ~
## $ dsch_date_mday7 <dbl> -1.4145519, -0.6400079, 0.1345362, 0.9090802, ~
## $ id_I_Commercial <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ id_I_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ id_O_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ id_O_Commercial <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ ID_I_Commercial <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ ID_I_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ ID_O_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ ID_O_Commercial <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_01 <dbl> 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_02 <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_03 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0~
## $ dsch_date_month.lbl_04 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1~
## $ dsch_date_month.lbl_05 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_06 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ dsch_date_wday.lbl_2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_7 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
Use plot_time_series_regression to test out several natural splines:
splines::ns() with degrees of freedom 1, 2, 3, and 4.Which value of df would you select?
data_prepared_tbl %>%
group_by(id) %>%
plot_time_series_regression(
.date_var = dsch_date,
.formula = value ~ splines::ns(dsch_date, df = 3),
.show_summary = FALSE,
.facet_ncol = 2
)Create a model specification for linear regression:
linear_reg() functionset_engine("lm")model_spec_lmmodel_spec_lm <- linear_reg() %>%
set_engine("lm")Create a recipe for the spline model.
recipe_spec_basedeg_free = 3recipe_spec_1_splinerecipe_spec_1_spline <- recipe_spec_base %>%
step_rm(dsch_date) %>%
step_ns(ends_with("index.num"), deg_free = 3) %>%
step_rm(starts_with("lag_"))
recipe_spec_1_spline %>%
prep() %>%
juice() %>%
glimpse()## Rows: 1,612
## Columns: 45
## $ value <dbl> 1.33598747, 1.47176823, 1.47176823, 0.6622414~
## $ dsch_date_year <dbl> -1.505993, -1.505993, -1.505993, -1.505993, -~
## $ dsch_date_half <dbl> -0.9739739, -0.9739739, -0.9739739, -0.973973~
## $ dsch_date_quarter <dbl> -1.3235094, -1.3235094, -1.3235094, -1.323509~
## $ dsch_date_month <dbl> -1.5780833, -1.5780833, -1.5780833, -1.578083~
## $ dsch_date_day <dbl> -1.66112036, -0.86593615, -0.07075194, 0.7244~
## $ dsch_date_mday <dbl> -1.66112036, -0.86593615, -0.07075194, 0.7244~
## $ dsch_date_qday <dbl> -1.7148451, -1.4496234, -1.1844017, -0.919179~
## $ dsch_date_yday <dbl> -1.70440843, -1.63725147, -1.57009451, -1.502~
## $ dsch_date_mweek <dbl> 1.8076364, -1.3374363, -0.5511681, 0.2351000,~
## $ dsch_date_week <dbl> -1.673800698, -1.606700483, -1.539600269, -1.~
## $ dsch_date_week2 <dbl> 0.9935073, -1.0059107, 0.9935073, -1.0059107,~
## $ dsch_date_week3 <dbl> -0.008424679, 1.226173687, -1.243023044, -0.0~
## $ dsch_date_week4 <dbl> -0.4447431, 0.4525359, 1.3498149, -1.3420222,~
## $ dsch_date_mday7 <dbl> -1.4145519, -0.6400079, 0.1345362, 0.9090802,~
## $ id_I_Commercial <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ id_I_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ id_O_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ id_O_Commercial <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ ID_I_Commercial <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ ID_I_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ ID_O_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ ID_O_Commercial <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_01 <dbl> 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_02 <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_03 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, ~
## $ dsch_date_month.lbl_04 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ~
## $ dsch_date_month.lbl_05 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_06 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_wday.lbl_1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ dsch_date_wday.lbl_2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_wday.lbl_3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_wday.lbl_4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_wday.lbl_5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_wday.lbl_6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_wday.lbl_7 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ dsch_date_index.num_ns_1 <dbl> 0.000000000, -0.001897055, -0.003793515, -0.0~
## $ dsch_date_index.num_ns_2 <dbl> 0.00000000, 0.00567732, 0.01135410, 0.0170297~
## $ dsch_date_index.num_ns_3 <dbl> 0.000000000, -0.003780196, -0.007560030, -0.0~
Create a workflow for the linear regression and preprocessing recipe:
workflow()add_model() to add the model_spec_lmadd_recipe() to add the recipe_spec_1_splineworkflow_fit_lm_1_splineworkflow_fit_lm_1_spline <- workflow() %>%
add_model(model_spec_lm) %>%
add_recipe(recipe_spec_1_spline) %>%
fit(training(splits))
workflow_fit_lm_1_spline %>%
pull_workflow_fit() %>%
pluck("fit") %>%
summary()##
## Call:
## stats::lm(formula = ..y ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2771 -0.4676 0.0860 0.6183 2.1711
##
## Coefficients: (17 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.953e+01 1.708e+02 0.524 0.600196
## dsch_date_year 5.111e+01 9.903e+01 0.516 0.605883
## dsch_date_half 2.994e-01 2.065e-01 1.450 0.147173
## dsch_date_quarter -3.649e+01 1.866e+01 -1.955 0.050729 .
## dsch_date_month 3.018e+01 2.019e+01 1.495 0.135147
## dsch_date_day 2.816e+00 1.754e+00 1.605 0.108626
## dsch_date_mday NA NA NA NA
## dsch_date_qday -9.637e+00 4.905e+00 -1.965 0.049618 *
## dsch_date_yday 1.506e+01 9.287e+00 1.622 0.105104
## dsch_date_mweek -1.139e-01 3.306e-02 -3.445 0.000587 ***
## dsch_date_week -1.633e+00 1.640e+00 -0.996 0.319490
## dsch_date_week2 -1.490e-02 2.431e-02 -0.613 0.539888
## dsch_date_week3 -3.339e-02 2.211e-02 -1.510 0.131265
## dsch_date_week4 -9.802e-03 2.432e-02 -0.403 0.686953
## dsch_date_mday7 -1.105e-01 1.079e-01 -1.024 0.306218
## id_I_Commercial 3.174e-02 6.153e-02 0.516 0.606042
## id_I_Blue.Cross 6.147e-02 6.145e-02 1.000 0.317274
## id_O_Blue.Cross 3.977e-02 6.145e-02 0.647 0.517628
## id_O_Commercial NA NA NA NA
## ID_I_Commercial NA NA NA NA
## ID_I_Blue.Cross NA NA NA NA
## ID_O_Blue.Cross NA NA NA NA
## ID_O_Commercial NA NA NA NA
## dsch_date_month.lbl_01 -1.163e+00 4.814e-01 -2.416 0.015812 *
## dsch_date_month.lbl_02 -6.775e-01 3.053e-01 -2.219 0.026599 *
## dsch_date_month.lbl_03 -1.211e+00 5.331e-01 -2.272 0.023242 *
## dsch_date_month.lbl_04 -4.380e-01 3.138e-01 -1.396 0.162967
## dsch_date_month.lbl_05 -5.685e-01 2.498e-01 -2.275 0.023016 *
## dsch_date_month.lbl_06 NA NA NA NA
## dsch_date_month.lbl_07 -6.716e-01 4.615e-01 -1.455 0.145774
## dsch_date_month.lbl_08 -2.229e-01 2.503e-01 -0.891 0.373310
## dsch_date_month.lbl_09 NA NA NA NA
## dsch_date_month.lbl_10 -3.300e-01 2.752e-01 -1.199 0.230711
## dsch_date_month.lbl_11 NA NA NA NA
## dsch_date_month.lbl_12 NA NA NA NA
## dsch_date_wday.lbl_1 NA NA NA NA
## dsch_date_wday.lbl_2 NA NA NA NA
## dsch_date_wday.lbl_3 NA NA NA NA
## dsch_date_wday.lbl_4 NA NA NA NA
## dsch_date_wday.lbl_5 NA NA NA NA
## dsch_date_wday.lbl_6 NA NA NA NA
## dsch_date_wday.lbl_7 NA NA NA NA
## dsch_date_index.num_ns_1 -1.021e+02 1.955e+02 -0.523 0.601345
## dsch_date_index.num_ns_2 -2.084e+02 3.998e+02 -0.521 0.602267
## dsch_date_index.num_ns_3 -1.441e+02 2.779e+02 -0.518 0.604226
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8723 on 1584 degrees of freedom
## Multiple R-squared: 0.2182, Adjusted R-squared: 0.2048
## F-statistic: 16.37 on 27 and 1584 DF, p-value: < 2.2e-16
Create a recipe for the spline model.
recipe_spec_baserecipe_spec_2_lagrecipe_spec_2_lag <- recipe_spec_base %>%
step_rm(dsch_date) %>%
step_naomit(starts_with("lag_"))
recipe_spec_2_lag %>%
prep() %>%
juice() %>%
glimpse()## Rows: 1,456
## Columns: 48
## $ lag_value_lag14 <dbl> 1.3418737, 0.1755650, 1.0271333, 1.8472998, 1.~
## $ lag_value_lag14_roll_7 <dbl> 1.5609203, 2.0248939, 1.8340084, 1.7842654, 1.~
## $ lag_value_lag14_roll_14 <dbl> 1.990609, 1.957860, 1.722229, 1.742142, 1.7838~
## $ lag_value_lag14_roll_28 <dbl> 2.037706, 2.012624, 2.073307, 2.057663, 2.0367~
## $ lag_value_lag14_roll_52 <dbl> 2.337295, 2.266172, 2.266172, 2.214577, 2.2419~
## $ value <dbl> 2.1252687, 0.6622414, 1.3359875, 1.5981771, 1.~
## $ dsch_date_index.num <dbl> -1.390734, -1.382163, -1.373592, -1.365021, -1~
## $ dsch_date_year <dbl> -1.505993, -1.505993, -1.505993, -1.505993, -1~
## $ dsch_date_half <dbl> 1.0260846, 1.0260846, 1.0260846, 1.0260846, 1.~
## $ dsch_date_quarter <dbl> 0.4914605, 1.3989455, 1.3989455, 1.3989455, 1.~
## $ dsch_date_month <dbl> 0.7686605, 1.0620034, 1.0620034, 1.0620034, 1.~
## $ dsch_date_day <dbl> 1.6332142, -0.9795339, -0.1843497, 0.6108345, ~
## $ dsch_date_mday <dbl> 1.6332142, -0.9795339, -0.1843497, 0.6108345, ~
## $ dsch_date_qday <dbl> 1.73303742, -1.48751223, -1.22229049, -0.95706~
## $ dsch_date_yday <dbl> 0.9147130, 0.9818700, 1.0490270, 1.1161839, 1.~
## $ dsch_date_mweek <dbl> 1.8076364, -1.3374363, -0.5511681, 0.2351000, ~
## $ dsch_date_week <dbl> 0.9431077, 1.0102079, 1.0773081, 1.1444083, 1.~
## $ dsch_date_week2 <dbl> -1.0059107, 0.9935073, -1.0059107, 0.9935073, ~
## $ dsch_date_week3 <dbl> -0.008424679, 1.226173687, -1.243023044, -0.00~
## $ dsch_date_week4 <dbl> -1.3420222, -0.4447431, 0.4525359, 1.3498149, ~
## $ dsch_date_mday7 <dbl> 1.6836243, -0.6400079, 0.1345362, 0.9090802, 1~
## $ id_I_Commercial <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ id_I_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ id_O_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ id_O_Commercial <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ ID_I_Commercial <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ ID_I_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ ID_O_Blue.Cross <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ ID_O_Commercial <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1~
## $ dsch_date_month.lbl_02 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_03 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_04 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_05 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_06 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_09 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_10 <dbl> 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0~
## $ dsch_date_wday.lbl_1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ dsch_date_wday.lbl_2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ dsch_date_wday.lbl_7 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
Save the workflow as workflow_fit_lm_2_lag.
workflow_fit_lm_2_lag <- workflow() %>%
add_model(model_spec_lm) %>%
add_recipe(recipe_spec_2_lag) %>%
fit(training(splits))
workflow_fit_lm_2_lag %>%
pull_workflow_fit() %>%
pluck("fit") %>%
summary()##
## Call:
## stats::lm(formula = ..y ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4820 -0.4284 0.0850 0.5245 2.2623
##
## Coefficients: (17 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.531631 0.254007 2.093 0.03653 *
## lag_value_lag14 0.022437 0.026699 0.840 0.40083
## lag_value_lag14_roll_7 -0.007029 0.063196 -0.111 0.91145
## lag_value_lag14_roll_14 -0.721999 0.098321 -7.343 3.49e-13 ***
## lag_value_lag14_roll_28 1.142943 0.129098 8.853 < 2e-16 ***
## lag_value_lag14_roll_52 0.038481 0.091889 0.419 0.67544
## dsch_date_index.num 27.786942 93.132260 0.298 0.76547
## dsch_date_year -27.755549 93.189522 -0.298 0.76587
## dsch_date_half 0.357460 0.238525 1.499 0.13419
## dsch_date_quarter -48.946998 22.679797 -2.158 0.03108 *
## dsch_date_month 44.430226 23.169945 1.918 0.05536 .
## dsch_date_day 4.041183 2.013180 2.007 0.04490 *
## dsch_date_mday NA NA NA NA
## dsch_date_qday -12.883762 5.960788 -2.161 0.03083 *
## dsch_date_yday 2.068467 8.836445 0.234 0.81495
## dsch_date_mweek -0.102330 0.033380 -3.066 0.00221 **
## dsch_date_week -0.192245 1.271154 -0.151 0.87981
## dsch_date_week2 -0.009719 0.023374 -0.416 0.67762
## dsch_date_week3 -0.038572 0.021282 -1.812 0.07012 .
## dsch_date_week4 -0.014311 0.023396 -0.612 0.54084
## dsch_date_mday7 -0.142337 0.102563 -1.388 0.16541
## id_I_Commercial -0.038187 0.059121 -0.646 0.51844
## id_I_Blue.Cross -0.090549 0.059173 -1.530 0.12618
## id_O_Blue.Cross -0.003852 0.059084 -0.065 0.94803
## id_O_Commercial NA NA NA NA
## ID_I_Commercial NA NA NA NA
## ID_I_Blue.Cross NA NA NA NA
## ID_O_Blue.Cross NA NA NA NA
## ID_O_Commercial NA NA NA NA
## dsch_date_month.lbl_01 -1.260557 0.535502 -2.354 0.01871 *
## dsch_date_month.lbl_02 -0.788813 0.330841 -2.384 0.01724 *
## dsch_date_month.lbl_03 -1.693817 0.656871 -2.579 0.01002 *
## dsch_date_month.lbl_04 -0.563333 0.319277 -1.764 0.07788 .
## dsch_date_month.lbl_05 -0.721690 0.269331 -2.680 0.00746 **
## dsch_date_month.lbl_06 NA NA NA NA
## dsch_date_month.lbl_07 -0.884854 0.502607 -1.761 0.07853 .
## dsch_date_month.lbl_08 -0.380406 0.268577 -1.416 0.15688
## dsch_date_month.lbl_09 NA NA NA NA
## dsch_date_month.lbl_10 -0.468630 0.284968 -1.644 0.10029
## dsch_date_month.lbl_11 NA NA NA NA
## dsch_date_month.lbl_12 NA NA NA NA
## dsch_date_wday.lbl_1 NA NA NA NA
## dsch_date_wday.lbl_2 NA NA NA NA
## dsch_date_wday.lbl_3 NA NA NA NA
## dsch_date_wday.lbl_4 NA NA NA NA
## dsch_date_wday.lbl_5 NA NA NA NA
## dsch_date_wday.lbl_6 NA NA NA NA
## dsch_date_wday.lbl_7 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7963 on 1425 degrees of freedom
## Multiple R-squared: 0.306, Adjusted R-squared: 0.2913
## F-statistic: 20.94 on 30 and 1425 DF, p-value: < 2.2e-16
Start by making a modeltime table:
modeltime_table() to store your fitted workflowsmodel_tblmodel_tbl <- modeltime_table(
workflow_fit_lm_1_spline,
workflow_fit_lm_2_lag
)
model_tbl## # Modeltime Table
## # A tibble: 2 x 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <workflow> LM
## 2 2 <workflow> LM
As a precautionary measure, please refit the models using modeltime_refit(). This prevents models that can go bad over time because of software changes.
# Refitting makes sure your models work over time.
model_tbl <- model_tbl %>%
modeltime_refit(training(splits))Use testing data to calibrate the model:
model_tblmodeltime_calibrate() to calibrate the model using testing(splits) (out-of-sample data)calibration_tblcalibration_tbl <- model_tbl %>%
modeltime_calibrate(
new_data = testing(splits)
, id = "id"
)
calibration_tbl## # Modeltime Table
## # A tibble: 2 x 5
## .model_id .model .model_desc .type .calibration_data
## <int> <list> <chr> <chr> <list>
## 1 1 <workflow> LM Test <tibble [55 x 5]>
## 2 2 <workflow> LM Test <tibble [55 x 5]>
Use modeltime_accuracy() to calculate the accuracy metrics.
calibration_tbl %>%
modeltime_accuracy(acc_by_id = FALSE) %>%
table_modeltime_accuracy(.interactive = FALSE)| Accuracy Table | ||||||||
|---|---|---|---|---|---|---|---|---|
| .model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
| 1 | LM | Test | 1.07 | 120.38 | 0.83 | 146.55 | 1.4 | 0.00 |
| 2 | LM | Test | 0.82 | 126.80 | 0.64 | 111.63 | 1.2 | 0.24 |
calibration_tbl %>%
modeltime_accuracy(acc_by_id = TRUE) %>%
table_modeltime_accuracy(.interactive = FALSE)| Accuracy Table | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| .model_id | .model_desc | .type | id | mae | mape | mase | smape | rmse | rsq |
| 1 | LM | Test | I_Commercial | 0.96 | 134.11 | 0.83 | 143.39 | 1.13 | 0.00 |
| 1 | LM | Test | I_Blue Cross | 1.01 | 74.17 | 1.63 | 121.20 | 1.15 | 0.10 |
| 1 | LM | Test | O_Blue Cross | 1.29 | 136.66 | 0.87 | 152.40 | 1.99 | 0.02 |
| 1 | LM | Test | O_Commercial | 1.00 | 137.83 | 1.13 | 170.97 | 1.13 | 0.00 |
| 2 | LM | Test | I_Commercial | 0.98 | 262.71 | 0.85 | 143.46 | 1.13 | 0.00 |
| 2 | LM | Test | I_Blue Cross | 0.45 | 47.45 | 0.73 | 35.96 | 0.52 | 0.17 |
| 2 | LM | Test | O_Blue Cross | 1.21 | 116.90 | 0.82 | 153.75 | 1.89 | 0.32 |
| 2 | LM | Test | O_Commercial | 0.62 | 76.53 | 0.70 | 113.50 | 0.79 | 0.12 |
modeltime_forecast():
new_data = testing(splits)actual_data = data_prepared_tblplot_modeltime_forecast()calibration_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = bind_rows(training(splits), testing(splits)),
conf_by_id = TRUE
) %>%
group_by(id) %>%
plot_modeltime_forecast(.facet_ncol = 2)Forecasting thoughts:
calibration_tblmodeltime_refit() refit the model on the data_prepared_tbl datasetrefit_tbl <- calibration_tbl %>%
modeltime_refit(data = data_prepared_tbl)refit_tblmodeltime_forecast() to forecast the new_data = forecast_tbl. Use data_prepared_tbl as the actual data.plot_modeltime_forecast()refit_tbl %>%
modeltime_forecast(new_data = forecast_tbl,
actual_data = data_prepared_tbl,
conf_by_id = TRUE) %>%
group_by(id) %>%
plot_modeltime_forecast(.facet_ncol = 2)Apply the inversion to the forecast plot:
refit_tbl %>%
modeltime_forecast(new_data = forecast_tbl,
actual_data = data_prepared_tbl,
conf_by_id = TRUE) %>%
group_by(id) %>%
# Invert Transformation
mutate(across(.value:.conf_hi, .fns = ~ standardize_inv_vec(
x = .,
mean = c(mean_a, mean_b, mean_c, mean_d),
sd = c(sd_a, sd_b, sd_c, sd_d)
))) %>%
mutate(across(.value:.conf_hi, .fns = exp)) %>%
plot_modeltime_forecast(.facet_ncol = 2)workflow_fit_glmnet_2_lag <- workflow_fit_lm_2_lag %>%
update_model(
spec = linear_reg(penalty = 0.1, mixture = 0.5) %>%
set_engine("glmnet")
) %>%
fit(training(splits))calibration_tbl <- modeltime_table(
workflow_fit_lm_1_spline,
workflow_fit_lm_2_lag,
workflow_fit_glmnet_2_lag
) %>%
update_model_description(.model_id = 1, "LM - Spline Recipe") %>%
update_model_description(2, "LM - Lag Recipe") %>%
update_model_description(3, "GLMNET - Lag Recipe") %>%
modeltime_calibrate(testing(splits), id = "id")calibration_tbl %>%
modeltime_accuracy(acc_by_id = FALSE) %>%
table_modeltime_accuracy(.interactive = FALSE)| Accuracy Table | ||||||||
|---|---|---|---|---|---|---|---|---|
| .model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
| 1 | LM - Spline Recipe | Test | 1.07 | 120.38 | 0.83 | 146.55 | 1.40 | 0.00 |
| 2 | LM - Lag Recipe | Test | 0.82 | 126.80 | 0.64 | 111.63 | 1.20 | 0.24 |
| 3 | GLMNET - Lag Recipe | Test | 0.85 | 101.63 | 0.66 | 122.29 | 1.25 | 0.20 |
calibration_tbl %>%
modeltime_accuracy(acc_by_id = TRUE) %>%
table_modeltime_accuracy(.interactive = FALSE)| Accuracy Table | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| .model_id | .model_desc | .type | id | mae | mape | mase | smape | rmse | rsq |
| 1 | LM - Spline Recipe | Test | I_Commercial | 0.96 | 134.11 | 0.83 | 143.39 | 1.13 | 0.00 |
| 1 | LM - Spline Recipe | Test | I_Blue Cross | 1.01 | 74.17 | 1.63 | 121.20 | 1.15 | 0.10 |
| 1 | LM - Spline Recipe | Test | O_Blue Cross | 1.29 | 136.66 | 0.87 | 152.40 | 1.99 | 0.02 |
| 1 | LM - Spline Recipe | Test | O_Commercial | 1.00 | 137.83 | 1.13 | 170.97 | 1.13 | 0.00 |
| 2 | LM - Lag Recipe | Test | I_Commercial | 0.98 | 262.71 | 0.85 | 143.46 | 1.13 | 0.00 |
| 2 | LM - Lag Recipe | Test | I_Blue Cross | 0.45 | 47.45 | 0.73 | 35.96 | 0.52 | 0.17 |
| 2 | LM - Lag Recipe | Test | O_Blue Cross | 1.21 | 116.90 | 0.82 | 153.75 | 1.89 | 0.32 |
| 2 | LM - Lag Recipe | Test | O_Commercial | 0.62 | 76.53 | 0.70 | 113.50 | 0.79 | 0.12 |
| 3 | GLMNET - Lag Recipe | Test | I_Commercial | 0.94 | 177.95 | 0.82 | 140.03 | 1.09 | 0.17 |
| 3 | GLMNET - Lag Recipe | Test | I_Blue Cross | 0.48 | 43.78 | 0.78 | 39.49 | 0.57 | 0.32 |
| 3 | GLMNET - Lag Recipe | Test | O_Blue Cross | 1.25 | 96.74 | 0.84 | 166.45 | 1.99 | 0.46 |
| 3 | GLMNET - Lag Recipe | Test | O_Commercial | 0.71 | 87.00 | 0.80 | 144.80 | 0.85 | 0.09 |
calibration_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl,
conf_by_id = TRUE
) %>%
group_by(id) %>%
plot_modeltime_forecast(.facet_ncol = 2)refit_tbl <- calibration_tbl %>%
modeltime_refit(data = data_prepared_tbl)refit_tbl %>%
modeltime_forecast(new_data = forecast_tbl,
actual_data = data_prepared_tbl,
conf_by_id = TRUE) %>%
group_by(id) %>%
# Invert Transformation
mutate(across(
.value:.conf_hi,
.fns = ~ standardize_inv_vec(
x = .,
mean = c(mean_a, mean_b, mean_c, mean_d),
sd = c(sd_a, sd_b, sd_c, sd_d)
)
)) %>%
mutate(across(.value:.conf_hi, .fns = exp)) %>%
plot_modeltime_forecast(.facet_ncol = 2)